0001 function data = create_synthetic_dataset(data)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 if ~isfield(data, 'dataset'),
0027 number = data;
0028 clear data
0029 data.dataset = number;
0030 end
0031 if ~isfield(data, 'n'), data.n = 400; end
0032 if ~isfield(data, 'noise'), data.noise = 0.0; end
0033 if ~isfield(data, 'state'), data.state = 0; end
0034
0035
0036 rand('state', data.state);
0037 randn('state', data.state);
0038
0039 data.typ = 'data';
0040 switch data.dataset
0041 case 0
0042 data.name = 'swiss roll with hole';
0043 n = data.n;
0044 a = 1;
0045 b = 4;
0046 y = rand(2,n);
0047
0048 l1 = 0.05; l2 = 0.15;
0049 y = y - 0.5;
0050 ok = find((abs(y(1,:))>l1) | (abs(y(2,:))>l2));
0051 i = length(ok);
0052 y(:, 1:i) = y(:, ok);
0053 while (i<n)
0054 p = rand(2,1) - 0.5;
0055 if (abs(p(1))>l1) | (abs(p(2))>l2)
0056 i = i + 1;
0057 y(:,i) = p;
0058 end
0059 end
0060 y = y + 0.5;
0061 tt = (b-a)*y(1,:) + a;
0062 tt = pi*tt;
0063 height = 21*y(2,:);
0064 data.col = tt;
0065 data.x = [tt.*cos(tt); height; tt.*sin(tt)];
0066 data.z = [tt; height];
0067 data.az = -4;
0068 data.el = 13;
0069
0070 case -1
0071 data.name = 'uniform swiss roll';
0072 n = data.n;
0073 a = 1;
0074 b = 4;
0075 y = rand(2,n);
0076 data.z = y;
0077 switch 1
0078 case 1
0079
0080 tt = sqrt((b*b-a*a)*y(1,:)+a*a);
0081 case 2
0082
0083
0084 tt = (b-a)*y(1,:) + a;
0085 end
0086 tt = pi*tt;
0087
0088 height = 21*y(2,:);
0089 data.col = tt;
0090 data.x = [tt.*cos(tt); height; tt.*sin(tt)];
0091 data.az = -4;
0092 data.el = 13;
0093
0094 case 1
0095
0096 data.name = 'classic swiss roll';
0097 n = data.n;
0098 a = 1;
0099 b = 4;
0100 y = rand(2,n);
0101 tt = (b-a)*y(1,:) + a;
0102 tt = pi*tt;
0103 height = 21*y(2,:);
0104 data.col = tt;
0105 data.x = [tt.*cos(tt); height; tt.*sin(tt)];
0106 data.z = [tt; height];
0107 data.az = -4;
0108 data.el = 13;
0109
0110 case 11
0111
0112 data.name = 'undersampled swiss roll';
0113 data.n = 100;
0114 n = data.n;
0115 a = 1;
0116 b = 4;
0117 y = rand(2,n);
0118 tt = (b-a)*y(1,:) + a;
0119 tt = pi*tt;
0120 height = 21*y(2,:);
0121 data.col = tt;
0122 data.x = [tt.*cos(tt); height; tt.*sin(tt)];
0123 data.z = [tt; height];
0124 data.az = -4;
0125 data.el = 13;
0126
0127 case 12
0128
0129 data.name = 'classic swiss roll';
0130 data.n = 400;
0131 n = data.n;
0132 a = 1;
0133 b = 4;
0134 y = rand(2,n);
0135 tt = (b-a)*y(1,:) + a;
0136 tt = pi*tt;
0137 height = 21*y(2,:);
0138 data.col = tt;
0139 data.x = [tt.*cos(tt); height; tt.*sin(tt)];
0140 data.z = [tt; height];
0141 data.az = -4;
0142 data.el = 13;
0143
0144 case 2
0145 data.name = 'scurve';
0146 n = data.n;
0147
0148 angle = pi*(1.5*rand(1,ceil(n/2))-1); height = 5*rand(1,n);
0149 data.x = [[cos(angle), -cos(angle(1:floor(n/2)))]; height;[ sin(angle), 2-sin(angle)]];
0150 data.col = [angle, 1.5*pi + angle];
0151 data.z = [angle, 1.5*pi+angle; height];
0152
0153 case 3
0154
0155 data.name = 'square';
0156 n = data.n;
0157 d = 2;
0158
0159
0160 if ~isfield(data, 'D'), data.D = 3; end
0161
0162 D = data.D;
0163 A = randn(D, D);
0164 options.disp = 0;
0165 [R, dummy] = eigs(A*A', d, 'LM', options);
0166 tt = rand(d, n);
0167 data.col = tt(1,:);
0168 data.x = R*tt;
0169 data.z = tt;
0170 data.az = 7;
0171 data.el = 40;
0172
0173 case 4
0174 data.name = 'spiral';
0175 n = data.n;
0176 tt = (3*pi/2)*(1+2*rand(1, n));
0177 data.col = tt;
0178 data.x = [tt.*cos(tt); tt.*sin(tt)];
0179 data.z = tt;
0180
0181 case -4
0182 data.name = 'noisy spiral';
0183 n = data.n;
0184 tt = (3*pi/2)*(1+2*rand(1, n));
0185 data.col = tt;
0186 data.x = [tt.*cos(tt); tt.*sin(tt)];
0187 data.x = data.x + randn(size(data.x));
0188 data.z = tt;
0189
0190 case 5
0191 data.name = 'hole';
0192 n = data.n;
0193 data.x = rand(2,n) - 0.5;
0194
0195 l1 = 0.2; l2 = 0.2;
0196 ok = find((abs(data.x(1,:))>l1) | (abs(data.x(2,:))>l2));
0197 i = length(ok);
0198 data.x(:, 1:i) = data.x(:, ok);
0199 while (i<n)
0200 p = rand(2,1) - 0.5;
0201 if (abs(p(1))>l1) | (abs(p(2))>l2)
0202 i = i + 1;
0203 data.x(:,i) = p;
0204 end
0205 end
0206 data.col = data.x(2,:);
0207 data.z = data.x;
0208
0209 case 6
0210
0211
0212 data.name = 'P';
0213 load x
0214 x(2,:) = 500-x(2,:);
0215 data.x = x;
0216 data.z = x;
0217 data.col = data.z(2,:);
0218 data.n = size(x, 2);
0219
0220 case 7
0221 gamma = 0.8;
0222 data.name = 'fishbowl (uniform in data space)';
0223 n = data.n;
0224 data.x = rand(3,n)-0.5;
0225
0226 data.x = data.x ./ repmat(sqrt(sum(data.x.*data.x, 1)), [3 1]);
0227 ok = find(data.x(3,:) < gamma);
0228 i = length(ok);
0229 data.x(:, 1:i) = data.x(:, ok);
0230 while (i < n)
0231 p = rand(3,1)-0.5;
0232 p = p / sqrt(p'*p);
0233 if (p(3) < gamma)
0234 i = i+1;
0235 data.x(:, i) = p;
0236 end
0237 end
0238
0239
0240
0241 data.z = data.x(1:2,:) ./ repmat(1-data.x(3,:), [2 1]);
0242 data.col = data.x(3,:);
0243 data.az = -18;
0244 data.el = 16;
0245 case 8
0246 data.name = 'fishbowl (uniform in embedding space)';
0247 n = data.n;
0248 data.z = rand(2, n) - 0.5;
0249
0250 ok = find(sum(data.z .* data.z) <= 0.25);
0251 i = length(ok);
0252 data.z(:, 1:i) = data.z(:, ok);
0253 while (i < n)
0254 p = rand(2,1) - 0.5;
0255 if (p'*p <= 0.25)
0256 i = i + 1;
0257 data.z(:, i) = p;
0258 end
0259 end
0260 gamma = 0.8;
0261 data.z = 2*sqrt((1+gamma)/(1-gamma))*data.z;
0262
0263 alpha = 2 ./ (1 + sum(data.z .* data.z, 1));
0264 data.x = [repmat(alpha, [2 1]).*data.z; zeros(1, n)];
0265 data.x(3,:) = 1-alpha;
0266 data.col = data.x(3,:);
0267 data.az = -18;
0268 data.el = 16;
0269
0270 case 9
0271 data.name = 'gaussian blob';
0272 n = data.n;
0273 data.x = randn(3,n);
0274 data.z = data.x(2:3,:);
0275 data.col = data.x(3,:);
0276
0277 end
0278
0279
0280 data.D = size(data.x, 1);
0281
0282 data.e = randn(size(data.x));
0283 data.x_noisefree = data.x;
0284 data.x = data.x_noisefree + sqrt(data.noise)*data.e;
0285
0286
0287 data.distances = distanz(data.x);
0288
0289